29.4.1 代码理解概述#
代码理解模块是编程 Agent 的另一个核心能力,它能够分析、解释和理解现有代码的功能、结构和设计。代码理解涉及代码解析、语义分析、依赖分析等多个环节。
代码理解流程#
输入代码 ↓ 代码解析 ↓ 结构分析 ↓ 语义分析 ↓ 依赖分析 ↓ 功能推断 ↓ 生成解释
29.4.2 代码解析#
代码解析器#
pythonpython class CodeParser: """代码解析器""" def __init__(self): self.parsers = { 'python': PythonParser(), 'javascript': JavaScriptParser(), 'java': JavaParser(), 'cpp': CppParser() } def parse(self, code: str, language: str) -> ParsedCode: """解析代码""" parser = self.parsers.get(language.lower()) if not parser: raise ValueError(f"Unsupported language: {language}") return parser.parse(code) class PythonParser: """Python 解析器""" def parse(self, code: str) -> ParsedCode: """解析 Python 代码""" try: tree = ast.parse(code) parsed_code = ParsedCode( language='python', original_code=code, ast=tree ) # 提取类 parsed_code.classes = self._extract_classes(tree) # 提取函数 parsed_code.functions = self._extract_functions(tree) # 提取导入 parsed_code.imports = self._extract_imports(tree) # 提取全局变量 parsed_code.global_variables = self._extract_global_variables(tree) return parsed_code except SyntaxError as e: raise ValueError(f"Invalid Python code: {e}") def _extract_classes(self, tree: ast.AST) -> List[ClassInfo]: """提取类信息""" classes = [] for node in ast.walk(tree): if isinstance(node, ast.ClassDef): class_info = ClassInfo( name=node.name, bases=[self._get_name(base) for base in node.bases], methods=[self._extract_method(m) for m in node.body if isinstance(m, ast.FunctionDef)], attributes=[self._extract_attribute(a) for a in node.body if isinstance(a, ast.Assign)], docstring=ast.get_docstring(node) ) classes.append(class_info) return classes def _extract_functions(self, tree: ast.AST) -> List[FunctionInfo]: """提取函数信息""" functions = [] for node in ast.walk(tree): if isinstance(node, ast.FunctionDef): # 跳过类中的方法 if not any(isinstance(parent, ast.ClassDef) for parent in ast.walk(tree) if node in ast.iter_child_nodes(parent)): function_info = FunctionInfo( name=node.name, arguments=[arg.arg for arg in node.args.args], return_type=self._get_return_type(node), docstring=ast.get_docstring(node), decorators=[self._get_name(d) for d in node.decorator_list] ) functions.append(function_info) return functions def _extract_imports(self, tree: ast.AST) -> List[ImportInfo]: """提取导入信息""" imports = [] for node in ast.walk(tree): if isinstance(node, ast.Import): for alias in node.names: import_info = ImportInfo( module=alias.name, alias=alias.asname, type='import' ) imports.append(import_info) elif isinstance(node, ast.ImportFrom): for alias in node.names: import_info = ImportInfo( module=node.module, name=alias.name, alias=alias.asname, type='from' ) imports.append(import_info) return imports def _extract_global_variables(self, tree: ast.AST) -> List[VariableInfo]: """提取全局变量""" variables = [] for node in ast.walk(tree): if isinstance(node, ast.Assign): # 只提取模块级别的变量 if isinstance(node, ast.Module): for target in node.targets: if isinstance(target, ast.Name): var_info = VariableInfo( name=target.id, type=self._infer_type(node.value), value=self._get_value(node.value) ) variables.append(var_info) return variables def _extract_method(self, node: ast.FunctionDef) -> MethodInfo: """提取方法信息""" return MethodInfo( name=node.name, arguments=[arg.arg for arg in node.args.args], return_type=self._get_return_type(node), docstring=ast.get_docstring(node), is_static=any(isinstance(d, ast.Name) and d.id == 'staticmethod' for d in node.decorator_list), is_classmethod=any(isinstance(d, ast.Name) and d.id == 'classmethod' for d in node.decorator_list) ) def _extract_attribute(self, node: ast.Assign) -> AttributeInfo: """提取属性信息""" target = node.targets[0] if isinstance(target, ast.Name): return AttributeInfo( name=target.id, type=self._infer_type(node.value), value=self._get_value(node.value) ) return None def _get_name(self, node: ast.AST) -> str: """获取节点名称""" if isinstance(node, ast.Name): return node.id elif isinstance(node, ast.Attribute): return f"{self._get_name(node.value)}.{node.attr}" return str(node) def _get_return_type(self, node: ast.FunctionDef) -> str: """获取返回类型""" if node.returns: return self._get_name(node.returns) return "None" def _infer_type(self, node: ast.AST) -> str: """推断类型""" if isinstance(node, ast.Constant): return type(node.value).__name__ elif isinstance(node, ast.List): return "list" elif isinstance(node, ast.Dict): return "dict" elif isinstance(node, ast.Call): return self._get_name(node.func) return "Any" def _get_value(self, node: ast.AST) -> Any: """获取值""" if isinstance(node, ast.Constant): return node.value return None ```## 29.4.3 结构分析 ### 结构分析器 class StructureAnalyzer: """结构分析器""" def analyze(self, parsed_code: ParsedCode) -> StructureAnalysis: """分析代码结构""" analysis = StructureAnalysis() # 分析类的层次结构 analysis.class_hierarchy = self._analyze_class_hierarchy( parsed_code.classes ) # 分析函数调用关系 analysis.call_graph = self._analyze_call_graph(parsed_code) # 分析模块依赖 analysis.dependencies = self._analyze_dependencies(parsed_code) # 分析代码复杂度 analysis.complexity = self._analyze_complexity(parsed_code) return analysis def _analyze_class_hierarchy(self, classes: List[ClassInfo]) -> Dict[str, List[str]]: """分析类层次结构""" hierarchy = {} for cls in classes: hierarchy[cls.name] = cls.bases return hierarchy def _analyze_call_graph(self, parsed_code: ParsedCode) -> Dict[str, List[str]]: """分析函数调用关系""" call_graph = {} # 分析函数调用 for func in parsed_code.functions: calls = self._extract_function_calls(func, parsed_code) call_graph[func.name] = calls # 分析方法调用 for cls in parsed_code.classes: for method in cls.methods: calls = self._extract_method_calls(method, cls, parsed_code) call_graph[f"{cls.name}.{method.name}"] = calls return call_graph def _extract_function_calls(self, func: FunctionInfo, parsed_code: ParsedCode) -> List[str]: """提取函数调用""" calls = [] # 这里需要更复杂的 AST 分析 # 简化实现:从函数体中提取调用 return calls def _extract_method_calls(self, method: MethodInfo, cls: ClassInfo, parsed_code: ParsedCode) -> List[str]: """提取方法调用""" calls = [] # 这里需要更复杂的 AST 分析 # 简化实现:从方法体中提取调用 return calls def _analyze_dependencies(self, parsed_code: ParsedCode) -> List[Dependency]: """分析依赖关系""" dependencies = [] # 分析导入依赖 for imp in parsed_code.imports: dependency = Dependency( type='import', source=parsed_code.language, target=imp.module, strength='external' ) dependencies.append(dependency) # 分析类继承依赖 for cls in parsed_code.classes: for base in cls.bases: dependency = Dependency( type='inheritance', source=cls.name, target=base, strength='strong' ) dependencies.append(dependency) return dependencies def _analyze_complexity(self, parsed_code: ParsedCode) -> ComplexityMetrics: """分析代码复杂度"""
metrics = ComplexityMetrics()
计算圈复杂度
metrics.cyclomatic_complexity = self._calculate_cyclomatic_complexity( parsed_code )
计算认知复杂度
metrics.cognitive_complexity = self._calculate_cognitive_complexity( parsed_code )
计算维护性指数
metrics.maintainability_index = self._calculate_maintainability_index( parsed_code ) return metrics def _calculate_cyclomatic_complexity(self, parsed_code: ParsedCode) -> float: """计算圈复杂度""" complexity = 1 # 基础复杂度
遍历 AST,计算决策点
for node in ast.walk(parsed_code.ast): if isinstance(node, (ast.If, ast.While, ast.For, ast.ExceptHandler)): complexity += 1 elif isinstance(node, ast.BoolOp): complexity += len(node.values) - 1 return complexity def _calculate_cognitive_complexity(self, parsed_code: ParsedCode) -> float: """计算认知复杂度"""
简化实现
return self._calculate_cyclomatic_complexity(parsed_code) * 1.5 def _calculate_maintainability_index(self, parsed_code: ParsedCode) -> float: """计算维护性指数"""
简化实现
loc = len(parsed_code.original_code.split('\n')) complexity = self._calculate_cyclomatic_complexity(parsed_code)
MI = 171 - 5.2 * ln(V) - 0.23 * G - 16.2 * ln(L)
V = 圈复杂度, G = 代码行数, L = 代码行数
mi = 171 - 5.2 * math.log(complexity) - 0.23 * loc - 16.2 * math.log(loc) return max(0, min(100, mi))
bash## 29.4.4 语义分析 ### 语义分析器 ```python ```python class SemanticAnalyzer: """语义分析器""" def __init__(self, llm_client: LLMClient): self.llm_client = llm_client async def analyze(self, parsed_code: ParsedCode, structure: StructureAnalysis) -> SemanticAnalysis: """分析代码语义""" analysis = SemanticAnalysis() # 分析代码意图 analysis.purpose = await self._analyze_purpose(parsed_code) # 分析算法 analysis.algorithms = await self._analyze_algorithms(parsed_code) # 分析设计模式 analysis.design_patterns = await self._analyze_design_patterns( parsed_code, structure ) # 分析数据流 analysis.data_flow = await self._analyze_data_flow(parsed_code) return analysis async def _analyze_purpose(self, parsed_code: ParsedCode) -> str: """分析代码目的""" prompt = f""" 分析以下代码的主要目的和功能: {parsed_code.original_code} 请用简洁的语言描述这段代码的主要功能。 """ return await self.llm_client.complete(prompt) async def _analyze_algorithms(self, parsed_code: ParsedCode) -> List[AlgorithmInfo]: """分析算法""" prompt = f""" 识别以下代码中使用的算法: {parsed_code.original_code} 请识别: 1. 使用的主要算法(排序、搜索、图算法等) 2. 算法的时间复杂度 3. 算法的空间复杂度 以 JSON 格式返回结果。 """ response = await self.llm_client.complete(prompt) return self._parse_algorithms(response) async def _analyze_design_patterns(self, parsed_code: ParsedCode, structure: StructureAnalysis) -> List[str]: """分析设计模式""" prompt = f""" 识别以下代码中使用的设计模式: 类:{parsed_code.classes} 函数:{parsed_code.functions} 类层次结构:{structure.class_hierarchy} 请识别使用的设计模式。 """ response = await self.llm_client.complete(prompt) return self._parse_design_patterns(response) async def _analyze_data_flow(self, parsed_code: ParsedCode) -> DataFlowAnalysis: """分析数据流""" analysis = DataFlowAnalysis() # 分析输入 analysis.inputs = self._identify_inputs(parsed_code) # 分析输出 analysis.outputs = self._identify_outputs(parsed_code) # 分析转换 analysis.transformations = self._identify_transformations(parsed_code) return analysis def _identify_inputs(self, parsed_code: ParsedCode) -> List[str]: """识别输入""" inputs = [] # 函数参数 for func in parsed_code.functions: inputs.extend(func.arguments) # 方法参数 for cls in parsed_code.classes: for method in cls.methods: inputs.extend(method.arguments) return list(set(inputs)) def _identify_outputs(self, parsed_code: ParsedCode) -> List[str]: """识别输出""" outputs = [] # 函数返回值 for func in parsed_code.functions: if func.return_type != "None": outputs.append(f"{func.name}() -> {func.return_type}") # 方法返回值 for cls in parsed_code.classes: for method in cls.methods: if method.return_type != "None": outputs.append(f"{cls.name}.{method.name}() -> {method.return_type}") return outputs def _identify_transformations(self, parsed_code: ParsedCode) -> List[str]: """识别数据转换""" transformations = [] # 这里需要更复杂的分析 # 简化实现:基于函数名推断 for func in parsed_code.functions: if any(keyword in func.name.lower() for keyword in ['transform', 'convert', 'process', 'compute']): transformations.append(func.name) return transformations ```## 29.4.5 代码解释生成 ### 解释生成器 class ExplanationGenerator: """解释生成器""" def __init__(self, llm_client: LLMClient): self.llm_client = llm_client async def generate_explanation(self, parsed_code: ParsedCode, structure: StructureAnalysis, semantic: SemanticAnalysis) -> CodeExplanation: """生成代码解释""" explanation = CodeExplanation() # 生成总体概述 explanation.overview = await self._generate_overview( parsed_code, semantic ) # 生成类解释 explanation.class_explanations = await self._generate_class_explanations( parsed_code.classes, structure ) # 生成函数解释 explanation.function_explanations = await self._generate_function_explanations( parsed_code.functions, structure ) # 生成算法解释 explanation.algorithm_explanations = await self._generate_algorithm_explanations( semantic.algorithms ) # 生成使用示例 explanation.usage_examples = await self._generate_usage_examples( parsed_code ) return explanation async def _generate_overview(self, parsed_code: ParsedCode, semantic: SemanticAnalysis) -> str: """生成总体概述""" prompt = f""" 为以下代码生成总体概述: 代码目的:{semantic.purpose} 类:{[c.name for c in parsed_code.classes]} 函数:{[f.name for f in parsed_code.functions]} 请生成一个清晰的总体概述,包括: 1. 代码的主要功能 2. 主要组件 3. 整体架构 """ return await self.llm_client.complete(prompt) async def _generate_class_explanations(self, classes: List[ClassInfo], structure: StructureAnalysis) -> Dict[str, str]: """生成类解释""" explanations = {} for cls in classes: prompt = f""" 为以下类生成详细解释: 类名:{cls.name} 父类:{cls.bases} 方法:{[m.name for m in cls.methods]} 属性:{[a.name for a in cls.attributes if a]} 文档字符串:{cls.docstring} 请生成详细的类解释,包括: 1. 类的职责 2. 主要方法的功能 3. 使用场景 """ explanation = await self.llm_client.complete(prompt) explanations[cls.name] = explanation return explanations async def _generate_function_explanations(self, functions: List[FunctionInfo], structure: StructureAnalysis) -> Dict[str, str]: """生成函数解释""" explanations = {} for func in functions: prompt = f""" 为以下函数生成详细解释: 函数名:{func.name} 参数:{func.arguments} 返回类型:{func.return_type} 文档字符串:{func.docstring} 请生成详细的函数解释,包括: 1. 函数的功能 2. 参数说明 3. 返回值说明 4. 使用示例 """ explanation = await self.llm_client.complete(prompt) explanations[func.name] = explanation return explanations async def _generate_algorithm_explanations(self, algorithms: List[AlgorithmInfo]) -> Dict[str, str]: """生成算法解释""" explanations = {} for algo in algorithms: prompt = f""" 为以下算法生成详细解释: 算法名称:{algo.name} 时间复杂度:{algo.time_complexity} 空间复杂度:{algo.space_complexity} 请生成详细的算法解释,包括: 1. 算法原理 2. 实现细节 3. 优缺点分析 4. 适用场景 """ explanation = await self.llm_client.complete(prompt) explanations[algo.name] = explanation return explanations async def _generate_usage_examples(self, parsed_code: ParsedCode) -> List[str]: """生成使用示例""" examples = [] # 为每个类生成示例 for cls in parsed_code.classes: prompt = f""" 为以下类生成使用示例: 类名:{cls.name} 方法:{[m.name for m in cls.methods]} 请生成 2-3 个实用的使用示例。 """ example = await self.llm_client.complete(prompt) examples.append(example) # 为主要函数生成示例 for func in parsed_code.functions[:3]: # 只生成前 3 个函数的示例 prompt = f""" 为以下函数生成使用示例: 函数名:{func.name} 参数:{func.arguments} 请生成 1-2 个实用的使用示例。 """ example = await self.llm_client.complete(prompt) examples.append(example) return examples